{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Import packages\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "import getpass\n",
    "import socket\n",
    "import json\n",
    "import zipfile\n",
    "import io\n",
    "import math\n",
    "import os\n",
    "import shutil\n",
    "import pprint\n",
    "import re\n",
    "import time\n",
    "from statistics import mean\n",
    "from requests.auth import HTTPBasicAuth"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Select data sets and determine version numbers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create dictionary of data set parameters we'll use in our access API command below. We'll start with data set IDs (e.g. ATL07) of interest here, also known as \"short name\".\n",
    "\n",
    "data_dict = {\n",
    "    'sea_ice_fb' : {'short_name' : 'ATL10'},\n",
    "    'sea_ice_height' : {'short_name' : 'ATL07'},\n",
    "    'ist' : {'short_name' : 'MOD29'},\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get json response from CMR collection metadata to grab version numbers and add to data_dict\n",
    "\n",
    "for i in range(len(data_dict)):\n",
    "    cmr_collections_url = 'https://cmr.earthdata.nasa.gov/search/collections.json'\n",
    "    response = requests.get(cmr_collections_url, params=list(data_dict.values())[i])\n",
    "    results = json.loads(response.content) \n",
    "\n",
    "    # Find all instances of 'version_id' in metadata and print most recent version number\n",
    "    versions = [el['version_id'] for el in results['feed']['entry']]\n",
    "    versions = [i for i in versions if not any(c.isalpha() for c in i)]\n",
    "    data_dict[list(data_dict.keys())[i]]['version'] = max(versions)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Select time and area of interest\n",
    "\n",
    "Data granules are returned based on a spatial bounding box and temporal range."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Bounding Box spatial parameter in 'W,S,E,N' decimal degrees format \n",
    "\n",
    "bounding_box = '140,72,153,80' \n",
    "\n",
    "#add bounding_box to each data set dictionary\n",
    "for k, v in data_dict.items(): data_dict[k]['bounding_box'] = bounding_box"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "jupyter": {
     "source_hidden": true
    }
   },
   "outputs": [],
   "source": [
    "#Input temporal range in 'YYYY-MM-DDThh:mm:ssZ,YYYY-MM-DDThh:mm:ssZ' format\n",
    "\n",
    "temporal = '2019-03-23T00:00:00Z,2019-03-23T23:59:59Z'\n",
    "\n",
    "#add temporal to each data set dictionary\n",
    "for k, v in data_dict.items(): data_dict[k]['temporal'] = temporal"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Determine how many granules exist over this time and area of interest, as well as the average size and total volume of those granules"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Query number of granules (paging over results)\n",
    "granule_search_url = 'https://cmr.earthdata.nasa.gov/search/granules'\n",
    "for i in range(len(data_dict)):\n",
    "    params = {\n",
    "        'short_name': list(data_dict.values())[i]['short_name'],\n",
    "        'version': list(data_dict.values())[i]['version'],\n",
    "        'bounding_box': bounding_box,\n",
    "        'temporal': temporal,\n",
    "        'page_size': 100,\n",
    "        'page_num': 1\n",
    "    }\n",
    "    granules = []\n",
    "    headers={'Accept': 'application/json'}\n",
    "    while True:\n",
    "        response = requests.get(granule_search_url, params=params, headers=headers)\n",
    "        results = json.loads(response.content)\n",
    "\n",
    "        if len(results['feed']['entry']) == 0:\n",
    "            # Out of results, so break out of loop\n",
    "            break\n",
    "\n",
    "        # Collect results and increment page_num\n",
    "        granules.extend(results['feed']['entry'])\n",
    "        params['page_num'] += 1\n",
    "    print('There are', len(granules), 'granules of', list(data_dict.values())[i]['short_name'], 'version', list(data_dict.values())[i]['version'], 'over my area and time of interest.')\n",
    "    for k, v in data_dict.items(): data_dict[k]['gran_num'] = len(granules)\n",
    "    granule_sizes = [float(granule['granule_size']) for granule in granules]\n",
    "    print(f'The average size of each granule is {mean(granule_sizes):.2f} MB and the total size of all {len(granules)} granules is {sum(granule_sizes):.2f} MB')\n",
    "    print()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note that subsetting, reformatting, or reprojecting can alter the size of the granules if those services are applied to your request."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Select subsetting, reformatting, and reprojection options for each dataset"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Input Earthdata Login credentials\n",
    "\n",
    "An Earthdata Login account is required to access data from the NSIDC DAAC. If you do not already have an Earthdata Login account, visit http://urs.earthdata.nasa.gov to register."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "uid = '' # Enter Earthdata Login user name\n",
    "pswd = getpass.getpass('Earthdata Login password: ') # Input and store Earthdata Login password\n",
    "email = '' # Enter email associated with Earthata Login account"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The NSIDC DAAC supports customization services on many of our NASA Earthdata mission collections. See the Customize and Access Data notebook to query the subsetting, reformatting, and reprojection service options available for your data set of interest. Since we already know these options, we'll add our subsetting requests directly into our data dictionary. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Spatial and temporal subsetting for ATL10\n",
    "\n",
    "data_dict['sea_ice_fb']['bbox'] = bounding_box\n",
    "data_dict['sea_ice_fb']['time'] = '2019-03-23T00:00:00,2019-03-23T23:59:59'\n",
    "\n",
    "# Spatial and temporal subsetting for ATL07\n",
    "\n",
    "data_dict['sea_ice_height']['bbox'] = bounding_box\n",
    "data_dict['sea_ice_height']['time'] = '2019-03-23T00:00:00,2019-03-23T23:59:59'\n",
    "\n",
    "# Spatial subsetting and polar stereographic reprojection for MOD29\n",
    "\n",
    "data_dict['ist']['bbox'] = bounding_box"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now let's select a subset of variables. We'll use these primary variables of interest for the ICESat-2 ATL07 product:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#ATL07\n",
    "#Use only strong beams\n",
    "\n",
    "data_dict['sea_ice_height']['coverage'] = '/gt1l/sea_ice_segments/delta_time,\\\n",
    "/gt1l/sea_ice_segments/latitude,\\\n",
    "/gt1l/sea_ice_segments/longitude,\\\n",
    "/gt1l/sea_ice_segments/heights/height_segment_confidence,\\\n",
    "/gt1l/sea_ice_segments/heights/height_segment_height,\\\n",
    "/gt1l/sea_ice_segments/heights/height_segment_quality,\\\n",
    "/gt1l/sea_ice_segments/heights/height_segment_surface_error_est,\\\n",
    "/gt2l/sea_ice_segments/delta_time,\\\n",
    "/gt2l/sea_ice_segments/latitude,\\\n",
    "/gt2l/sea_ice_segments/longitude,\\\n",
    "/gt2l/sea_ice_segments/heights/height_segment_confidence,\\\n",
    "/gt2l/sea_ice_segments/heights/height_segment_height,\\\n",
    "/gt2l/sea_ice_segments/heights/height_segment_quality,\\\n",
    "/gt2l/sea_ice_segments/heights/height_segment_surface_error_est,\\\n",
    "/gt3l/sea_ice_segments/delta_time,\\\n",
    "/gt3l/sea_ice_segments/latitude,\\\n",
    "/gt3l/sea_ice_segments/longitude,\\\n",
    "/gt3l/sea_ice_segments/heights/height_segment_confidence,\\\n",
    "/gt3l/sea_ice_segments/heights/height_segment_height,\\\n",
    "/gt3l/sea_ice_segments/heights/height_segment_quality,\\\n",
    "/gt3l/sea_ice_segments/heights/height_segment_surface_error_est'\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Select data access configurations\n",
    "\n",
    "The data request can be accessed asynchronously or synchronously. The asynchronous option will allow concurrent requests to be queued and processed without the need for a continuous connection. Those requested orders will be delivered to the specified email address, or they can be accessed programmatically as shown below. Synchronous requests will automatically download the data as soon as processing is complete. For this tutorial, we will be selecting the asynchronous method. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Set NSIDC data access base URL\n",
    "base_url = 'https://n5eil02u.ecs.nsidc.org/egi/request'\n",
    "\n",
    "for k, v in data_dict.items():\n",
    "    #Add email address\n",
    "    data_dict[k]['email'] = email\n",
    "    \n",
    "    #Set the request mode to asynchronous\n",
    "    data_dict[k]['request_mode'] = 'async'\n",
    "\n",
    "    #Set the page size to the maximum for asynchronous requests \n",
    "    page_size = 2000\n",
    "    data_dict[k]['page_size'] = page_size\n",
    "\n",
    "    #Determine number of orders needed for requests over 2000 granules. \n",
    "    page_num = math.ceil(data_dict[k]['gran_num']/page_size)\n",
    "    data_dict[k]['page_num'] = page_num\n",
    "    del data_dict[k]['gran_num']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Create the API endpoint \n",
    "\n",
    "Programmatic API requests are formatted as HTTPS URLs that contain key-value-pairs specifying the service operations that we specified above. The following command can be executed via command line, a web browser, or in Python below. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "endpoint_list = [] \n",
    "for k, v in data_dict.items():\n",
    "    param_string = '&'.join(\"{!s}={!r}\".format(k,v) for (k,v) in v.items())\n",
    "    param_string = param_string.replace(\"'\",\"\")\n",
    "    \n",
    "    #Print API base URL + request parameters\n",
    "    API_request = api_request = f'{base_url}?{param_string}'\n",
    "    endpoint_list.append(API_request)\n",
    "    if data_dict[k]['page_num'] > 1:\n",
    "        for i in range(data_dict[k]['page_num']):\n",
    "            page_val = i + 2\n",
    "            data_dict[k]['page_num'] = page_val\n",
    "            API_request = api_request = f'{base_url}?{param_string}'\n",
    "            endpoint_list.append(API_request)\n",
    "\n",
    "print(\"\\n\".join(\"\\n\"+s for s in endpoint_list))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Request data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We will now download data using the Python requests library. The data will be downloaded directly to this notebook directory in a new Outputs folder. The progress of each order will be reported."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from xml.etree import ElementTree as ET\n",
    "\n",
    "# Create an output folder if the folder does not already exist.\n",
    "path = str(os.getcwd() + '/Outputs')\n",
    "if not os.path.exists(path):\n",
    "    os.mkdir(path)\n",
    "    \n",
    "# Create session to store cookie and pass credentials to capabilities url\n",
    "session = requests.session()\n",
    "\n",
    "# Request data service for each page number, and unzip outputs\n",
    "for k, v in data_dict.items():\n",
    "    for i in range(data_dict[k]['page_num']):\n",
    "        page_val = i + 1\n",
    "        print(v['short_name'], 'Order: ', page_val)\n",
    "\n",
    "    # For all requests other than spatial file upload, use get function\n",
    "        request = session.get(base_url, params=v.items())\n",
    "        print('Request HTTP response: ', request.status_code)\n",
    "\n",
    "    # Raise bad request: Loop will stop for bad response code.\n",
    "        request.raise_for_status()\n",
    "        #print('Order request URL: ', request.url)\n",
    "        esir_root = ET.fromstring(request.content)\n",
    "        #print('Order request response XML content: ', request.content)\n",
    "\n",
    "    #Look up order ID\n",
    "        orderlist = []   \n",
    "        for order in esir_root.findall(\"./order/\"):\n",
    "            orderlist.append(order.text)\n",
    "        orderID = orderlist[0]\n",
    "        print('order ID: ', orderID)\n",
    "\n",
    "    #Create status URL\n",
    "        statusURL = base_url + '/' + orderID\n",
    "        print('status URL: ', statusURL)\n",
    "\n",
    "    #Find order status\n",
    "        request_response = session.get(statusURL)    \n",
    "        print('HTTP response from order response URL: ', request_response.status_code)\n",
    "\n",
    "    # Raise bad request: Loop will stop for bad response code.\n",
    "        request_response.raise_for_status()\n",
    "        request_root = ET.fromstring(request_response.content)\n",
    "        statuslist = []\n",
    "        for status in request_root.findall(\"./requestStatus/\"):\n",
    "            statuslist.append(status.text)\n",
    "        status = statuslist[0]\n",
    "        print('Data request ', page_val, ' is submitting...')\n",
    "        print('Initial request status is ', status)\n",
    "\n",
    "    #Continue loop while request is still processing\n",
    "        while status == 'pending' or status == 'processing': \n",
    "            print('Status is not complete. Trying again.')\n",
    "            time.sleep(10)\n",
    "            loop_response = session.get(statusURL)\n",
    "\n",
    "    # Raise bad request: Loop will stop for bad response code.\n",
    "            loop_response.raise_for_status()\n",
    "            loop_root = ET.fromstring(loop_response.content)\n",
    "\n",
    "    #find status\n",
    "            statuslist = []\n",
    "            for status in loop_root.findall(\"./requestStatus/\"):\n",
    "                statuslist.append(status.text)\n",
    "            status = statuslist[0]\n",
    "            print('Retry request status is: ', status)\n",
    "            if status == 'pending' or status == 'processing':\n",
    "                continue\n",
    "\n",
    "    #Order can either complete, complete_with_errors, or fail:\n",
    "    # Provide complete_with_errors error message:\n",
    "        if status == 'complete_with_errors' or status == 'failed':\n",
    "            messagelist = []\n",
    "            for message in loop_root.findall(\"./processInfo/\"):\n",
    "                messagelist.append(message.text)\n",
    "            print('error messages:')\n",
    "            pprint.pprint(messagelist)\n",
    "\n",
    "    # Download zipped order if status is complete or complete_with_errors\n",
    "        if status == 'complete' or status == 'complete_with_errors':\n",
    "            downloadURL = 'https://n5eil02u.ecs.nsidc.org/esir/' + orderID + '.zip'\n",
    "            print('Zip download URL: ', downloadURL)\n",
    "            print('Beginning download of zipped output...')\n",
    "            zip_response = session.get(downloadURL)\n",
    "            # Raise bad request: Loop will stop for bad response code.\n",
    "            zip_response.raise_for_status()\n",
    "            with zipfile.ZipFile(io.BytesIO(zip_response.content)) as z:\n",
    "                z.extractall(path)\n",
    "            print('Data request', page_val, 'is complete.')\n",
    "        else: print('Request failed.')\n",
    "    print()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Finally, we will clean up the Output folder by removing individual order folders:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Clean up Outputs folder by removing individual granule folders \n",
    "\n",
    "for root, dirs, files in os.walk(path, topdown=False):\n",
    "    for file in files:\n",
    "        try:\n",
    "            shutil.move(os.path.join(root, file), path)\n",
    "        except OSError:\n",
    "            pass\n",
    "    for name in dirs:\n",
    "        os.rmdir(os.path.join(root, name))    "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
